# SPDS
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## ── Attaching packages ──────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   1.0.1
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## Warning: package 'ggplot2' was built under R version 4.0.2
## Warning: package 'tidyr' was built under R version 4.0.2
## Warning: package 'readr' was built under R version 4.0.2
## Warning: package 'dplyr' was built under R version 4.0.2
## Warning: package 'forcats' was built under R version 4.0.2
## ── Conflicts ─────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Warning: package 'sf' was built under R version 4.0.2
## Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1
library(units)
## Warning: package 'units' was built under R version 4.0.2
## udunits system database from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/units/share/udunits
# Data
library(USAboundaries)
library(rnaturalearthdata)

# Visualization
library(gghighlight)
## Warning: package 'gghighlight' was built under R version 4.0.2
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.0.2
library(knitr)
## Warning: package 'knitr' was built under R version 4.0.2
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.0.2
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows

##Question 1

#continental US States
USAboundaries::us_states(resolution = "low") %>%
  st_as_sf(coords = c("lng", "lat"), crs = 4326) %>%
  filter(!state_name %in% c("Puerto Rico",
                            "Alaska",
                            "Hawaii")) ->
  conus

#set projected coordinate system to 'eqdc'
eqdc = '+proj=eqdc +lat_0=40 +lon_0=-96 +lat_1=20 +lat_2=60 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs'

st_transform(conus, eqdc) -> conus
#country boundaries for Mexico, the United States of America, and Canada
rnaturalearthdata::countries110 -> countries110

st_as_sf(countries110, crs = 4326) -> worldboundaries

st_transform(worldboundaries, eqdc) -> worldboundaries
#loading cities from CSV file 
readr::read_csv("data/uscities.csv") %>%
  st_as_sf(coords = c("lng", "lat"), crs = 4326) %>%
  filter(!state_name %in% c("Hawaii", "Alaska", "Puerto Rico")) %>%
         st_transform(., eqdc) ->
  uscities
## Parsed with column specification:
## cols(
##   city = col_character(),
##   city_ascii = col_character(),
##   state_id = col_character(),
##   state_name = col_character(),
##   county_fips = col_double(),
##   county_name = col_character(),
##   county_fips_all = col_character(),
##   county_name_all = col_character(),
##   lat = col_double(),
##   lng = col_double(),
##   population = col_double(),
##   density = col_double(),
##   source = col_character(),
##   military = col_logical(),
##   incorporated = col_logical(),
##   timezone = col_character(),
##   ranking = col_double(),
##   zips = col_character(),
##   id = col_double()
## )

##Question 2

#distance to US borders (2.1)

st_union(conus) %>%
  st_cast("MULTILINESTRING") ->
  USboundaries

uscities %>%
  select(city, state_name) %>%
  mutate(dist = st_distance(uscities, USboundaries),
  dist = units::set_units(dist, "km"),
  dist = units::drop_units(dist)) ->
  border_dist

border_dist %>%
  slice_max(dist, n = 5) %>%
  st_drop_geometry() ->
  top5_to_border

knitr::kable(top5_to_border,
             caption = "Distance of US Cities to US Borders",
             col.names = c("City", "State", "Distance to Border")) %>%
    kable_styling(bootstrap_options = "striped")
Distance of US Cities to US Borders
City State Distance to Border
Dresden Kansas 1012.317
Herndon Kansas 1007.750
Hill City Kansas 1005.147
Atwood Kansas 1004.734
Jennings Kansas 1003.646
#2.2 calculating the distance of each city to the nearest state boundary
st_combine(conus) %>%
  st_cast("MULTILINESTRING") ->
  USboundaries2

uscities %>%
  select(city, state_name) %>%
  mutate(dist2 = st_distance(uscities, USboundaries2),
         dist2 = units::set_units(dist2, "km"),
         dist2 = units::drop_units(dist2)) ->
  border_dist2

border_dist2 %>%
  slice_max(dist2, n = 5) %>%
  st_drop_geometry() ->
  top5_to_stateborder

knitr::kable(top5_to_stateborder,
             caption = "Distance of US Cities to US State Borders",
             col.names = c("City", "State", "Distance to State Border")) %>%
    kable_styling(bootstrap_options = "striped")
Distance of US Cities to US State Borders
City State Distance to State Border
Lampasas Texas 308.9216
Bertram Texas 302.8190
Kempner Texas 302.5912
Harker Heights Texas 298.8125
Florence Texas 298.6804
#2.3 calculating the distance of each city to the Mexican Border
worldboundaries %>%
  st_cast("MULTILINESTRING") %>%
  filter(admin == "Mexico") ->
  mexico_border

uscities %>%
  select(city, state_name) %>%
  mutate(dist3 = st_distance(uscities, mexico_border),
         dist3 = units::set_units(dist3, "km"),
         dist3 = units::drop_units(dist3)) ->
  border_dist3

border_dist3 %>%
  slice_max(dist3, n = 5) %>%
  st_drop_geometry() ->
  top5_to_mexico

knitr::kable(top5_to_mexico,
             caption = "Distance of US Cities to Mexican Border",
             col.names = c("City", "State", "Distance to Mexican Border")) %>%
    kable_styling(bootstrap_options = "striped")
Distance of US Cities to Mexican Border
City State Distance to Mexican Border
Caribou Maine 3250.334
Presque Isle Maine 3234.570
Calais Maine 3134.348
Eastport Maine 3125.624
Old Town Maine 3048.366
#2.4 calculating the distance of each city to the Canadian border
worldboundaries %>%
  st_cast("MULTILINESTRING") %>%
  filter(admin == "Canada") ->
  canada_border

uscities %>%
  select(city, state_name) %>%
  mutate(dist4 = st_distance(uscities, canada_border),
         dist4 = units::set_units(dist4, "km"),
         dist4 = units::drop_units(dist4)) ->
  border_dist4

border_dist4 %>%
  slice_max(dist4, n = 5) %>%
  st_drop_geometry() ->
  top5_to_canada

knitr::kable(top5_to_canada,
             caption = "Distance of US Cities to Canadian Border",
             col.names = c("City", "State", "Distance to Canadian Border")) %>%
    kable_styling(bootstrap_options = "striped")
Distance of US Cities to Canadian Border
City State Distance to Canadian Border
Guadalupe Guerra Texas 2206.455
Sandoval Texas 2205.641
Fronton Texas 2204.784
Fronton Ranchettes Texas 2202.118
Evergreen Texas 2202.020

##Question 3

#3.1 10 largest USA cities (by population)
uscities %>%
  slice_max(population, n = 10) ->
  top10_pop

ggplot() +
  geom_sf(data = conus) +
  geom_sf(data = top10_pop$geometry, add = TRUE, size= 2, color = "red") +
  ggrepel::geom_label_repel(
    data = top10_pop,
    aes(label = city, geometry = geometry),
    stat = "sf_coordinates",
    size = 3) +
  labs(title = "10 Largest US Cities by Population") +
  ggthemes::theme_map() 
## Warning: Ignoring unknown parameters: add

#3.2 City Distance from the National Border
border_dist %>%
  slice_max(dist, n = 5) ->
  top5_to_bordermap

ggplot() +
  geom_sf(data = USboundaries) +
  geom_sf(data = border_dist, aes(col = dist), size = .1) +
  geom_sf(data = top5_to_bordermap, col = "red", size = 1) +
  scale_color_gradient(low = "grey", high = "blue") +
  ggrepel::geom_label_repel(
    data = top5_to_bordermap,
    aes(label = city, geometry = geometry),
    stat = "sf_coordinates",
    size = 3) +
  labs(title = "US Cities' Distance from National Border") +
  ggthemes::theme_map()

#3.3 City Distance from the Nearest State Border
border_dist2 %>%
  slice_max(dist2, n = 5) ->
  top5_to_statebordermap

ggplot() +
  geom_sf(data = USboundaries2) +
  geom_sf(data = border_dist2, aes(col = dist2), size = .1) +
  geom_sf(data = top5_to_statebordermap, col = "red", size = 1) +
  scale_color_gradient(low = "grey", high = "blue") +
  ggrepel::geom_label_repel(
    data = top5_to_statebordermap,
    aes(label = city, geometry = geometry),
    stat = "sf_coordinates",
    size = 3) + 
  labs(title = "US Cities' Distance from State Borders") +
  ggthemes::theme_map()

#3.4
uscities %>%
  select(city, state_name, population) %>%
  mutate(dist3 = st_distance(uscities, mexico_border),
         dist3 = units::set_units(dist3, "km"),
         dist3 = units::drop_units(dist3)) %>%
  mutate(dist4 = st_distance(uscities, canada_border),
         dist4 = units::set_units(dist4, "km"),
         dist4 = units::drop_units(dist4)) %>%
  mutate(diffdist = abs(dist3 - dist4)) ->
  equaldistance

equaldistance %>% 
  filter(diffdist <= 100) %>%
  slice_max(population, n = 5) ->
  ED_pop

ggplot() +
  geom_sf(data = USboundaries2) +
  geom_sf(data = equaldistance, aes(col = diffdist), size = .1) +
  scale_color_gradient(low = "grey", high = "blue") +
  gghighlight::gghighlight(diffdist <= 100) +
  ggrepel::geom_label_repel(
    data = ED_pop,
    aes(label = city, geometry = geometry),
    stat = "sf_coordinates",
    size = 3) +
  labs(title = "Cities that are equal distance from the Canadian and Mexican border ± 100 km") +
  ggthemes::theme_map()
## Warning: Could not calculate the predicate for layer 1; ignored

#4.1 Cities Within 100 Miles from Borders
uscities %>%
  select(city, state_name, population) %>%
  mutate(dist = st_distance(uscities, USboundaries),
         dist = units::set_units(dist, "km"),
         dist = units::drop_units(dist)) %>%
  filter(dist <= 160) %>%
  st_drop_geometry() ->
  bordercities

bordercities %>%
  summarize(population) %>%
  sum()
## [1] 259935815
data.frame(number_cities = "12283",
           pop = "259935815") -> table

knitr::kable(table,
             caption = "Cities Near National Border",
             col.names = c("Number of Cities", "Population")) %>%
  kable_styling(bootstrap_options = "striped")
Cities Near National Border
Number of Cities Population
12283 259935815